#########################################################################
# FigureE1.R
#########################################################################

library(tidyverse)

#########################################################################
# 1. Process Data
#########################################################################


dt <- read.csv("qualtrics_Oakland.csv")[-c(1:2), ] %>%
  rename(
    C1_9 = C1_17, C1_10 = C1_18,
    C2_9 = C2_17, C2_10 = C2_18
  ) %>%
  mutate(id = row_number())


#########################################################################
# 2. Forced Ranking
#########################################################################

# Reference choice set

ref_set <- c(
  "Scott", "Hodge", "Taylor",
  "Liu", "Thao", "Fuente",
  "Villanueva", "Reimann", "Jordan",
  "Reid"
)

# Decompose ballot position x candidate
dtc <- dt %>%
  separate(C2_DO,
    into = c(
      "C2_pos1", "C2_pos2", "C2_pos3",
      "C2_pos4", "C2_pos5", "C2_pos6",
      "C2_pos7", "C2_pos8", "C2_pos9",
      "C2_pos10"
    ),
    sep = "[|]", remove = F
  ) %>% # Separate by "|"
  dplyr::select(id, starts_with("C2"), -c("C2_DO")) %>%
  pivot_longer(
    cols = C2_1:C2_10,
    values_to = "rank"
  ) %>%
  dplyr::select(id, name, rank, everything()) %>%
  mutate(
    name = case_when(
      name == "C2_1" ~ ref_set[1],
      name == "C2_2" ~ ref_set[2],
      name == "C2_3" ~ ref_set[3],
      name == "C2_4" ~ ref_set[4],
      name == "C2_5" ~ ref_set[5],
      name == "C2_6" ~ ref_set[6],
      name == "C2_7" ~ ref_set[7],
      name == "C2_8" ~ ref_set[8],
      name == "C2_9" ~ ref_set[9],
      name == "C2_10" ~ ref_set[10]
    ),
    rank = as.numeric(rank)
  )

# Count the total occurrences of each 'name'
total_counts <- dtc %>%
  group_by(name) %>%
  summarise(total_count = n())

# Filter the dataframe for rows where rank is 1
filtered_data <- dtc %>% filter(rank == 1)

# Count the occurrences of each 'name' where rank is 1
name_counts <- filtered_data %>%
  group_by(name) %>%
  summarise(count = n())

# Merge total counts with counts where rank is 1
merged_data <- left_join(name_counts, total_counts, by = "name")

# Calculate the percentage and create a new column 'percentage'
merged_data <- merged_data %>% mutate(percentage = count / total_count * 100)

# Print the result
print(merged_data)

################################################################################
### Estimate the voter share and compute SE

name_counts <- table(filtered_data$name)
name_proportions <- prop.table(name_counts)

# Compute standard errors
name_se <- sqrt(name_proportions * (1 - name_proportions) / sum(name_counts))

# Compute 95% confidence intervals
z_score_95 <- qnorm(0.975) # Z-score for a 95% confidence interval
z_score_95 <- qnorm(0.95) # Z-score for a 90% confidence interval

ci_lower <- name_proportions - z_score_95 * name_se
ci_upper <- name_proportions + z_score_95 * name_se

# Display results
result_table_full <- data.frame(
  Name = names(name_proportions),
  Proportion = name_proportions,
  Standard_Error = name_se,
  CI_Lower = ci_lower,
  CI_Upper = ci_upper
)


# Multiply CI_Lower.Freq and CI_Upper.Freq by 100
result_table_full$CI_Lower.Freq <- result_table_full$CI_Lower.Freq * 100
result_table_full$CI_Upper.Freq <- result_table_full$CI_Upper.Freq * 100

result_table_full$dataset <- "Forced-Ranking Question"
colnames(result_table_full)[1] <- "name"


#########################################################################
# 3. Optional Ranking
#########################################################################

# Decompose ballot position x candidate
dtc <- dt %>%
  separate(C1_DO,
    into = c(
      "C1_pos1", "C1_pos2", "C1_pos3",
      "C1_pos4", "C1_pos5", "C1_pos6",
      "C1_pos7", "C1_pos8", "C1_pos9",
      "C1_pos10"
    ),
    sep = "[|]", remove = F
  ) %>% # Separate by "|"
  dplyr::select(id, starts_with("C1"), -c("C1_DO")) %>%
  pivot_longer(
    cols = C1_1:C1_10,
    values_to = "rank"
  ) %>%
  mutate(
    name = case_when(
      name == "C1_1" ~ ref_set[1],
      name == "C1_2" ~ ref_set[2],
      name == "C1_3" ~ ref_set[3],
      name == "C1_4" ~ ref_set[4],
      name == "C1_5" ~ ref_set[5],
      name == "C1_6" ~ ref_set[6],
      name == "C1_7" ~ ref_set[7],
      name == "C1_8" ~ ref_set[8],
      name == "C1_9" ~ ref_set[9],
      name == "C1_10" ~ ref_set[10]
    ),
    rank = as.numeric(rank),
    selection = ifelse(is.na(rank), 0, 1)
  ) %>%
  dplyr::select(id, name, selection, rank, everything())


# Count the total occurrences of each 'name'
total_counts <- dtc %>%
  group_by(name) %>%
  summarise(total_count = n())

# Filter the dataframe for rows where rank is 1
filtered_data <- dtc %>% filter(rank == 1)

# Count the occurrences of each 'name' where rank is 1
name_counts <- filtered_data %>%
  group_by(name) %>%
  summarise(count = n())

# Merge total counts with counts where rank is 1
merged_data1 <- left_join(name_counts, total_counts, by = "name")

# Calculate the percentage and create a new column 'percentage'
merged_data1 <- merged_data1 %>% mutate(percentage = count / total_count * 100)

# Print the result
print(merged_data1)

################################################################################
### Estimate the voter share and compute SE

name_counts <- table(filtered_data$name)
name_proportions <- prop.table(name_counts)

# Compute standard errors
name_se <- sqrt(name_proportions * (1 - name_proportions) / sum(name_counts))

# Compute 95% confidence intervals
z_score_95 <- qnorm(0.975) # Z-score for a 95% confidence interval
z_score_95 <- qnorm(0.95) # Z-score for a 90% confidence interval

ci_lower <- name_proportions - z_score_95 * name_se
ci_upper <- name_proportions + z_score_95 * name_se

# Display results
result_table <- data.frame(
  Name = names(name_proportions),
  Proportion = name_proportions,
  Standard_Error = name_se,
  CI_Lower = ci_lower,
  CI_Upper = ci_upper
)


# Multiply CI_Lower.Freq and CI_Upper.Freq by 100
result_table$CI_Lower.Freq <- result_table$CI_Lower.Freq * 100
result_table$CI_Upper.Freq <- result_table$CI_Upper.Freq * 100

result_table$dataset <- "Optional-Ranking Question"
colnames(result_table)[1] <- "name"


#########################################################################
# 4. Official Records
#########################################################################

##### Visualizing Oak General Mayor Election - 11/08/2022
#### https://www.acgov.org/rovresults/rcv/248/rcvresults.htm?race=Oakland%2F001-Mayor


# Combine the data frames into one for easier plotting
mayor_combined_data <- bind_rows(
  mutate(merged_data, dataset = "Forced-Ranking Question"),
  mutate(merged_data1, dataset = "Optional-Ranking Question")
)
# Reorder the 'name' factor based on 'percentage' in each dataset
mayor_combined_data$name <- reorder(mayor_combined_data$name, -mayor_combined_data$percentage)

# Map official_election_vote_share values based on 'name'
vote_share_mapping <- c(
  "Scott" = 2.98, "Hodge" = 4.62, "Taylor" = 33.07,
  "Liu" = 0.76, "Thao" = 31.79, "Fuente" = 10.27,
  "Villanueva" = 8.27, "Reimann" = 1.01, "Jordan" = 0.69,
  "Reid" = 6.08
)

# Create a new dataframe for the additional mapping results
additional_mapping <- data.frame(
  name = names(vote_share_mapping),
  percentage = unname(vote_share_mapping),
  dataset = "Actual Election Data"
)

# Append the additional mapping results to the senate_combined_data dataframe
mayor_combined_data <- bind_rows(mayor_combined_data, additional_mapping)

combined_table <- rbind(result_table, result_table_full)

# Add the Confidence Interval Results into the final dataframe used for plotting
mayor_combined_data <- merge(mayor_combined_data, combined_table[, c("name", "dataset", "CI_Lower.Freq", "CI_Upper.Freq")], by = c("name", "dataset"), all.x = TRUE)


#########################################################################
# 5. Generate Figure E1
#########################################################################

mayor_combined_data <- mayor_combined_data %>%
  mutate(new_var = ifelse(dataset == "Actual Election Data", percentage, NA)) %>%
  group_by(name) %>%
  fill(new_var, .direction = "down") %>%
  ungroup()

mayor_combined_data$name <- fct_reorder(
  mayor_combined_data$name,
  mayor_combined_data$new_var
)



# Plot 4B - Scatter plot with error bars, differentiated by dataset
ggplot(mayor_combined_data, aes(x = name, y = percentage, fill = dataset, shape = dataset, color = dataset)) +
  geom_point(position = position_dodge(width = 0.5), size = 3) + # Scatter plot
  geom_errorbar(aes(ymin = CI_Lower.Freq, ymax = CI_Upper.Freq),
    position = position_dodge(width = 0.5), width = 0
  ) + # Error bars

  labs(
    title = "Oakland Mayor Election Results",
    x = "Candidate Name",
    y = "Vote Share (%)"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
    axis.title = element_text(size = 14),
    plot.title = element_text(size = 16, hjust = 0.5),
    legend.position = "bottom",
    legend.text = element_text(size = 12),
    legend.title = element_blank()
  ) +
  scale_shape_manual(values = c(17, 16, 15)) + # Use different point shapes
  scale_color_manual(values = c("darkblue", "darkred", "darkgreen")) + # Use different colors
  scale_fill_manual(values = c("darkblue", "darkred", "darkgreen")) # Use different fill colors

# Display the plot
ggsave("FigureE1.pdf", width = 10, height = 6) # Specify the dimensions of the saved plot


#############################################################################
# END OF THIS R SOURCE FILE
#############################################################################
